Loading All R Packages
library(scales);library("plotrix");library(Maaslin2); library(ggtext);library(lavaan);library(psych);library("xlsx");library(lsr);library(quantreg);library("readxl")
library(semPlot);library(mediation);require(lme4);require(reshape2);library(mlma);library(binilib);library(plyr);library("viridis");library(RColorBrewer)
#library(mma);# library(d3heatmap);
library(magrittr); library(ggplot2);library( "censReg" );library(ggsankey);library(dplyr);library(tidyverse);library(dmetar);library(meta);library(ggforestplot);
## Warning, do not add: 'library(forestplot)'! It is not working with ggforestplot... (9.9.24)
library(mdatools);library(circlize);library(igraph);library('bigsnpr');library(rcompanion);library(scRNAseq);library(tibble);library(stringr);library(MOFA2);library('qpgraph')
#ok
library("grid"); library("ggplotify");library(ggpubr);library(rstatix);library(datarium);library(RColorBrewer); library(ggh4x); library(effsize)
library(chorddiag);library(corrplot);library(scater);library(mdatools);options(scipen = 999); library(car);library(FSA);library(pathviewr);library(glmnet)
library("lmtest");library(PerformanceAnalytics);library(psych);library("readxl");library(ggforce);library(ComplexHeatmap)
#These are ok to drive in start:
library('Hmisc');library(correlation);library(ggcorrplot);library(pheatmap);library(mgcv);library('ppcor')
library(rmdformats);library(prettydoc);library(hrbrthemes);library(tint);library(tufte)
# library(ComplexHeatmap); # library("heatmaply")
library(extrafont)Setting Global Variables
Importing Data and Metadata
#First set your data folder:
setwd("C:/Users/patati/Desktop/TurkuOW/RWork")
#And then load the primary steroid data
NAFLD=read_excel("NAFLD_SteroidStudy.xlsx",sheet = "LFAT_steroidsDATA") #l ei tästä
oknames=colnames(NAFLD); NAFLD=data.frame(NAFLD)
#The names of the steroid groups need to be imported early on:
groups=read.csv("groups_17823.csv", header = TRUE, sep=";")
groups=groups[,c('Group','Abbreviation')]
groups=groups[groups[,'Abbreviation']!='F',]
groups=groups[order(groups[,'Group']),]
#P4 was found from elsewhere to have the following characteristics:
NAFLD[,'P4'] = as.numeric(NAFLD[,'P4'])
NAFLD[,'P4'][is.na(NAFLD[,'P4'])] = 22557.3330346846#median(NAFLD[,'P4'], na.rm=TRUE)
NAFLD[,5:7][NAFLD[,5:7]==0.01]=0; colnames(NAFLD)=oknames
MASLD=read_excel("Combined.Matrix.For.Pauli.2023.10.17.Excel.Formatv2.xlsx") #vaan tästä!
oknames=colnames(MASLD); MASLD=data.frame(MASLD); colnames(MASLD)=oknames #Tätä kikkojen määrää...
rownames(MASLD)=MASLD[,1]
MASLD[,'P4'] = as.numeric(MASLD[,'P4']) #The same comment as above
MASLD[,'P4'][is.na(MASLD[,'P4'])] = 22557.3330346846
eva=c('Grade(0-3)', 'Stage(0-4)','Necroinflammation')
MASLD[,eva][MASLD[,eva]==0.01]=0; #MASLD[,eva]
td=c('11-KDHT','AN','DHT','17a-OHP5','E2','P5','DOC')
val=c(103,252,51,200,26.5,253,10); vale=c(100,250,50,200,25,250,10)#MASLD[1:30,td]
for (i in 1:7) {MASLD[,td][i][MASLD[,td][i]==val[i]]=vale[i]} #MASLD[1:30,td] # tu=c('E','11-KA4') # val=c(106000) # vale=c(100) # MASLD[,tu]
# These (E) are ok as per lab:
ME=read.csv('E_tikka231023.csv',header=TRUE, sep=";")
ME2=rownames(MASLD[MASLD[,'E']==106000,])
to=ME[which(ME[,1] %in% ME2),'patient.number']
te=ME[which(ME[,1] %in% ME2),'E']
MASLD[as.character(to),'E']=te
#These (11-KA4) will change in the lab (sometime after 24.10.23):
M11=read.csv('11KA4_tikka231023.csv',header=TRUE, sep=";")
M11[,1][c(1:5,9)];MASLD[as.character(M11[,1][c(1:5,9)]),'11-KA4'] #these were denoted with 'big interference'## [1] 24148220513 24127060213 24112081112 2457090909 1136130306 2470281009
## [1] 146616.061 169207.805 7726.399 4434.397 2958.374 1884.497
MASLD[as.character(M11[,1][c(1:5,9)]),'11-KA4'] = NA#median(MASLD[!rownames(MASLD) %in% as.character(M11[,1][c(1:5,9)]),'11-KA4'])
a=MASLD[order(MASLD[,'BMI']),'BMI']
b=NAFLD[order(NAFLD[,'BMI']),'BMI']
them=unique(b[! b %in% a])
NAFLD=NAFLD[order(NAFLD[,'BMI']),]
NAFLD=NAFLD[NAFLD[,'BMI']!=them,]
MASLD=MASLD[order(MASLD[,'BMI']),]
#https://appsilon.com/imputation-in-r/ #https://www.datasciencemadesimple.com/get-minimum-value-of-a-column-in-r-2/?expand_article=1
#New data import withouth changing the conames: https://readxl.tidyverse.org/articles/column-names.html
Bali=data.frame(read_excel("Liver_bile_acids_PFAS.xlsx",sheet = "Liver_BA",.name_repair = "minimal")); row.names(Bali)=Bali[,1]
Pfase=data.frame(read_excel("Liver_bile_acids_PFAS.xlsx",sheet = "PFAS_serum",.name_repair = "minimal")); rownames(Pfase)=as.vector(unlist(Pfase[,1]))
Base=data.frame(read_excel("Liver_bile_acids_PFAS.xlsx",sheet = "Serum_BA",.name_repair = "minimal"));rownames(Base)=as.vector(unlist(Base[,1]))
C4=data.frame(read_excel("Liver_bile_acids_PFAS.xlsx",sheet = "C4",.name_repair = "minimal")); rownames(C4)=as.vector(unlist(C4[,1]))
Clini=data.frame(read_excel("Matching clinical data_all.xlsx",sheet = "Sheet1",.name_repair = "minimal")); rownames(Clini)=as.vector(unlist(Clini[,1]));
# https://www.analyticsvidhya.com/blog/2021/06/hypothesis-testing-parametric-and-non-parametric-tests-in-statistics/
head(MASLD) #head(NAFLD);## PatientNumber E 11-KT 17a-OHP5 S B
## 24145190413 24145190413 47616.43 1013.1342 5023.952 754.5581 8580.997
## 2459090909 2459090909 44356.73 935.1387 1111.426 569.7927 6167.239
## 24102261010 24102261010 47984.26 1118.2585 4507.814 602.4448 15640.594
## 24152080813 24152080813 39935.60 941.8854 2129.994 1249.6505 6494.666
## 2465141009 2465141009 52673.05 1771.0763 7279.948 1043.9712 11716.736
## 24139220313 24139220313 60462.55 1004.7332 1875.432 3096.4376 20529.335
## 11b-OHA4 11-KDHT 11-KA4 DHEA T/Epi-T 17aOH-P4
## 24145190413 13735.454 100.0000 1833.115 12456.910 912.9862 1095.8051
## 2459090909 4709.126 100.0000 1004.484 5890.108 10091.7241 864.5884
## 24102261010 10061.662 100.0000 1287.833 15396.214 887.8295 2070.0143
## 24152080813 9409.882 100.0000 1378.703 5518.061 18375.4315 1718.2846
## 2465141009 21217.964 364.5037 2535.936 37500.567 2161.8853 2660.5894
## 24139220313 8241.221 100.0000 1445.514 11060.655 782.8436 1057.1951
## AN DHT A4 DOC P5 P4
## 24145190413 250.0000 50.0000 3065.905 58.15483 2375.196 270.70746
## 2459090909 574.9827 471.7821 1683.676 53.65145 1583.961 85.07423
## 24102261010 645.6174 188.7098 2795.175 66.57890 4172.336 1002.12828
## 24152080813 774.8066 886.5999 2300.818 74.36271 1216.956 90.69087
## 2465141009 1205.9718 359.8876 6157.676 123.53205 5012.128 11234.14780
## 24139220313 250.0000 204.5503 2518.245 215.88108 2384.332 111.49065
## A E2 E1 PFHpA PFHxS PFNA
## 24145190413 516.4204 864.33681 493.2497 0.03923419 0.02715859 0.01086114
## 2459090909 752.5202 60.08170 139.6349 0.12359324 0.07177058 0.04601904
## 24102261010 1413.2715 846.67184 363.6963 0.01848635 0.01765445 0.02409549
## 24152080813 2090.5190 87.28265 176.9886 0.08038657 0.05858428 0.03154907
## 2465141009 1054.2706 430.63324 422.7767 0.02749008 0.02685548 0.04712376
## 24139220313 2640.7874 65.88912 242.5530 0.05071257 0.03952623 0.02484828
## PFOA PFOS PFAS GENDER Fat_quartiles AGE BMI
## 24145190413 0.08592297 0.05282948 0.2160064 1 3 45 33.76327
## 2459090909 0.24675879 0.47560478 0.9637464 2 1 43 34.02500
## 24102261010 0.09258503 0.09897489 0.2517962 1 3 46 36.03472
## 24152080813 0.09929495 0.17347382 0.4432887 2 1 62 36.43884
## 2465141009 0.09445215 0.13267408 0.3285956 1 1 29 36.47283
## 24139220313 0.14483055 0.10067028 0.3605879 1 2 50 37.49664
## FPG FPI CPEP HBA1C TG CHOL HDL LDL AST ALT ASAT/ALAT GGT Afos
## 24145190413 5.6 6.5 1.07 5.7 1.77 5.1 0.78 3.8 27 20 1.35 42 45
## 2459090909 5.6 10.9 0.88 6.2 2.16 5.4 0.97 4.0 25 26 0.96 37 85
## 24102261010 5.5 12.3 0.87 5.7 0.99 4.1 0.97 2.6 26 28 0.93 23 53
## 24152080813 6.0 8.4 0.92 5.7 1.45 2.8 1.11 1.2 29 20 1.45 25 53
## 2465141009 4.6 6.5 0.56 5.2 1.06 4.4 1.36 2.7 38 32 1.19 14 56
## 24139220313 4.9 7.5 0.88 5.5 0.87 6.0 1.61 4.4 39 24 1.63 29 89
## KREA Alb Lfat macro% Lfat micro% Grade(0-3)
## 24145190413 69 41.5 30.00 10.00 1
## 2459090909 83 37.299999999999997 0.01 0.01 0
## 24102261010 54 37.4 30.00 20.00 0
## 24152080813 58 39.4 5.00 10.00 0
## 2465141009 75 36.799999999999997 0.01 0.01 0
## 24139220313 65 40.4 20.00 10.00 0
## Stage(0-4) Necroinflammation DM Cer CE DG Hexcer
## 24145190413 2 1 2 22.97568 3.742173 3.656996 5.050135
## 2459090909 0 0 1 27.85512 3.522023 6.187968 8.000605
## 24102261010 0 0 2 25.71246 5.539332 7.168772 5.064414
## 24152080813 0 0 2 28.94434 3.753806 4.298263 8.161690
## 2465141009 0 0 2 17.96259 2.989326 3.682140 7.663703
## 24139220313 0 0 1 42.28834 3.936571 9.076021 5.690517
## LPC PC PC_O PE PI SM TG_SFA
## 24145190413 5.473749 69.14809 4.794285 41.22809 7.488177 18.55725 23.09330
## 2459090909 9.427624 100.82188 8.359673 58.47346 8.020518 22.52707 162.87741
## 24102261010 4.957438 91.34190 5.092107 46.50882 7.468343 21.71268 59.25330
## 24152080813 5.336822 81.92883 5.880665 49.37107 8.447411 26.97900 19.01253
## 2465141009 4.872248 99.88673 5.703837 52.57543 8.449404 16.33802 137.07321
## 24139220313 4.898084 92.01513 6.378002 45.40958 7.227134 21.17787 81.30240
## MUFA TG_PUFA Total_TG
## 24145190413 12.04360 26.78956 61.92646
## 2459090909 30.08698 18.46777 211.43216
## 24102261010 64.18440 77.35989 200.79759
## 24152080813 15.00187 33.20077 67.21517
## 2465141009 20.86866 22.91724 180.85911
## 24139220313 33.98135 19.80058 135.08433
#The below ordering needs to be changed...
Bali=Bali[as.character(MASLD$PatientNumber),];Bali[1:3,1:12] #https://stackoverflow.com/questions/54264980/r-how-to-set-row-names-attribute-as-numeric-from-character I did otherway around## PatientNumber CA CDCA DCA LCA UDCA
## 24145190413 24145190413 6.500727 30.82903 18.416920 80.34275 35.334259
## 2459090909 2459090909 7.507067 28.13308 16.789504 76.62429 6.387519
## 24102261010 24102261010 13.503284 39.76252 9.959156 89.36230 23.349181
## GCA GCDCA GDCA GDHCA GHCA GHDGA
## 24145190413 6928.07 27816.43 6324.72401 1.761993 30.25704 1.152694
## 2459090909 18653.05 21291.73 9430.65918 1.938591 85.27624 6.228229
## 24102261010 19576.52 16703.54 15.05147 1.777252 17.07526 1.000000
## row.identity..main.ID. X7.oxo.DCA CA CDCA DCA
## 24145190413 24145190413 0.07555178 0.1944944 2.673152 3.0075317
## 2459090909 2459090909 0.04928836 1.4589720 1.539261 0.9155837
## 24102261010 24102261010 0.07837153 0.3214637 1.507156 4.5696464
## GCA GCDCA GDCA GHCA GHDCA GLCA
## 24145190413 0.3141635 2.3345708 0.4850412 0.008370806 0.041274101 0.05434129
## 2459090909 0.6766498 0.9899164 0.5851593 0.016345058 0.031756517 0.08623496
## 24102261010 0.2827150 1.0604713 1.0598586 0.018827447 0.003505865 0.04046934
## GUDCA
## 24145190413 2.4912628
## 2459090909 0.3670994
## 24102261010 1.5968619
## PatientNumber fP.GLUC.0. P.GLUC..30.. P.GLUC..2h. fS.INS INS..30..
## 24145190413 24145190413 5.6 8.6 7.5 6.5 35.8
## 2459090909 2459090909 5.6 8.2 5.4 10.9 118.0
## 24102261010 24102261010 5.5 8.7 7.2 12.3 79.3
## INS..2h. Matsuda_0_30_120 Matsuda_0_120 GENDER AGE HGHT
## 24145190413 43.9 92.54164 91.34534 1 45 1.75
## 2459090909 62.6 51.65286 69.61594 2 43 1.78
## 24102261010 84.4 47.66210 49.32063 1 46 1.66
## Sample.ID C4
## 24145190413 24145190413 62.47535
## 2459090909 2459090909 47.76532
## 24102261010 24102261010 65.98880
## Patient.ID Benzylparaben Methylparaben
## 24145190413 24145190413 0.7051692 0.06670446
## 2459090909 2459090909 0.4099753 0.03094425
## 24102261010 24102261010 0.5434552 0.20578393
## Perfluorodecyl.ethanoic.acid PFHpA PFHxA PFHxA.1
## 24145190413 0.05383560 0.03923419 0.3198820 6.445621
## 2459090909 0.04050537 0.12359324 0.2237116 7.254053
## 24102261010 0.03777816 0.01848635 0.1774767 2.979016
## PFHxS PFNA PFOA PFOS Benzylparaben.1
## 24145190413 0.02715859 0.01086114 0.08592297 0.05282947 49.61277
## 2459090909 0.07177058 0.04601904 0.24675878 0.47560478 20.49539
## 24102261010 0.01765444 0.02409549 0.09258503 0.09897489 32.79873
#Menopause markers:
menopause=read_excel("Putative_metabolic_markers_menopause.xlsx",sheet='menopause markers',.name_repair = "minimal"); #rownames(Clini)=as.vector(unlist(Clini[,1]));
menopause=menopause[8:dim(menopause)[1],]; menopause=menopause[,-15]; menopause[2,2:14]=menopause[1,2:14]; menopause=data.frame(menopause); menopause[2,13:14]=c('v1','v2'); #dim(menopause)
colnames(menopause)=c('row_names',menopause[2,2:dim(menopause)[2]]); menopause=menopause[3:dim(menopause)[1],];rownames(menopause)=as.vector(unlist(menopause[,1]));
menopause=menopause[as.character(MASLD$PatientNumber),]
colnames(Pfase)[colnames(Pfase)=='PFHxA.1']='PFHxA_Branched'
Pfase=Pfase[,colnames(Pfase)!='Benzylparaben.1']
Pfase[Pfase[,'Benzylparaben']>10,'Benzylparaben']=NA #m
Jeihou=data.frame(read_excel("Copy of BA_liverfat_RawData.xls",.name_repair = "minimal")); row.names(Jeihou)=Jeihou[,1];Jeihou=Jeihou[as.character(MASLD$PatientNumber),]
u=Jeihou[Jeihou[,'GHDGA']=='<LLOQ',1]; a=u[!is.na(u)]; b=rownames(Bali[Bali[,'GHDGA']==1,]); #length(b) length(a);intersect(a,b);
uu=Jeihou[Jeihou[,'GHDGA']=='No Result',1]; aa=uu[!is.na(uu)]; #intersect(aa,b); c(aa,a)[!c(aa,a) %in% b] #24140250313 24112081112 #2/25
Bali[as.character(a),'GHDGA']=min(Bali[,'GHDGA'],na.rm=TRUE)/2
heps=Bali[Bali[,'GHDGA']==1,1] #2476250110 2487010610 24111141210
Bali[as.character(heps),'GHDGA']=NA
#https://www.datasciencemadesimple.com/get-minimum-value-of-a-column-in-r-2/?expand_article=1
mat=Bali[,c('TbMCA','ToMCA','TDCA','TDHCA','TLCA')]
mat[!mat>1]=10000
mat[mat==2]=10000 #colmins ei toiminuyt ja käytin:
hip=do.call(pmin, lapply(1:nrow(mat), function(i)mat[i,])) #https://stackoverflow.com/questions/13676878/fastest-way-to-get-min-from-every-column-in-a-matrix
hou=c('TbMCA','ToMCA','TDCA','TDHCA','TLCA')
for (i in 1:5) {Bali[Bali[,hou[i]]==1,hou[i]]=hip[i]}
for (i in 1:5) {Bali[Bali[,hou[i]]==2,hou[i]]=hip[i]}
# hepsa=Bali[Bali[,c('TbMCA','ToMCA','TDCA','TDHCA','TLCA')]==1,1] #2476250110 2487010610 24111141210
#An imputation for the missing values:
C4[is.na(C4[,2]),2]=median(C4[!is.na(C4[,2]),2]) #assuming that these were not below quantitation and replacing with median
#https://www.geeksforgeeks.org/performing-logarithmic-computations-in-r-programming-log-log10-log1p-and-log2-functions/# https://stackoverflow.com/questions/50476717/i-want-to-align-match-two-unequal-columns
#Matching two unequal columns..# #match the names of one original column (dat2) to ones that are missing (dat1 with to other) # #Not sure if this should be this difficult...
tv=cbind(MASLD[,1],NAFLD[,2:7],Clini[,'HOMA.IR'],MASLD[,colnames(NAFLD[,8:27])],Bali[,2:dim(Bali)[2]], C4[,2:dim(C4)[2]],Base[,2:dim(Base)[2]],Pfase[,(2:(dim(Pfase)[2]))], MASLD[,'PFAS']);#colnames(tv)#,C4[,2:dim(C4)[2]]). Clini[,'HOMA-IR'] # head(tv) #non nans # which(is.na(tv)) # MASLD[1:30,1:12] # NAFLD[1:30,7:20]
colnames(tv)[colnames(tv)=='C4[, 2:dim(C4)[2]]']='C4';colnames(tv)[colnames(tv)=='Clini[, \"HOMA.IR\"]']='HOMA-IR'
colnames(tv)[colnames(tv)=='MASLD[, \"PFAS\"]']='PFAS';
colnames(tv)[colnames(tv)=="MASLD[, 1]" ]='PatientNumber';#colnames(tv)#
rownames(tv)=unlist(Bali[,1]); #tv[1:5,1:11];#tv[1:5,12:55]; dim(tv[1:3,9:28]);tv[1:5,1:80]
hep=colnames(tv)[!colnames(tv) %in% c( "Benzylparaben" ,"Methylparaben")]
#not sure when it is the best time to take not needed variables away, perhaps at the very end?
tv=tv[,hep]
tv=cbind(tv,MASLD[,(dim(MASLD)[2]-13):dim(MASLD)[2]])
# here I add the lipids. In the future, I need to divide all the groups in their own components e.g. dataframe called 'lipids' so
# that adding them will be more straighfoward
# head(tv) #non nans , ok colnames # which(is.na(tv))
tve=tv[,2:dim(tv)[2]]; tve[tve == 0] <- NA;
#print(tve, max=nrow(tve)*ncol(tve)); note, here the covariates will be been scaled, since this yield later results (as seen with correlation plots and maps)
tv_half <- tve %>% mutate(replace(., is.na(.), min(., na.rm = T)/2)) #https://mdatools.com/docs/preprocessing--autoscaling.html
tv_half_log2 <- log2(tv_half);
# print(tv_half_log2, max=nrow(tv_half_log2)*ncol(tv_half_log2))
tv_auto <- prep.autoscale(tv_half_log2, center = TRUE, scale = TRUE);
# head(tv_auto) #non nans # which(is.na(tv_auto))
# Usually this should be the log2 value 'tv_half_log2' &
#https://svkucheryavski.gitbooks.io/mdatools/content/preprocessing/text.html
# Necroinflammation HOMA-IR Steatosis.Grade.0.To.3 Fibrosis.Stage.0.to.4
tv_all=cbind(tv[,1],tv_auto); #tv_all[1:5,1:11]; note, here the covariates have not been normalized or scaled/elaborated in any way; maybe I need to do so (1/324 or 28524...); check 27524 the
x1=colnames(tv_all[,c(1:8)]); v2=dim(NAFLD)[2]+1
x2=colnames(tv_all[,9:v2]);v3=(dim(Bali)[2]+v2);x3=colnames(tv_all[,(v2+1):(v3)]);v4=(dim(Base)[2])+v3
x4=colnames(tv_all[,(v3+1):(v4-1)]);x5=colnames(tv_all[,(v4):(dim(tv_all)[2])]);
x3 <- paste(x3, "_L", sep="") #https://stackoverflow.com/questions/6984796/how-to-paste-a-string-on-each-element-of-a-vector-of-strings-using-apply-in-r
x4=gsub("(-[0-9]*)*.1", "", x4) #https://stackoverflow.com/questions/18997297/remove-ending-of-string-with-gsub
x4 <- paste(x4, "_S", sep="")# https://rdrr.io/bioc/qpgraph/man/qpNrr.html
x5a=x5[1:9]
x6=x5[10:length(x5)] #dividing to lipids
x5=x5a #making sure that PFAS are separate
nm = c(x1,x2,x3,x4,x5,x6); nm=c('PatientNumber','Gender','AGE','BMI','Steatosis Grade','Fibrosis Stage','Necroinflammation','HOMA-IR',nm[9:length(nm)])
colnames(tv_all)=nm; #tv_all[1:5,1:30]; #NAFLD[1:2,1:28];
colnames(tv_all)[colnames(tv_all)=='MASLD[, \"PFAS\"]']='PFAS';
# head(tv_all) #non nans # which(is.na(tv_all)) # colnames(tv_all)
# jälkeenpäin lienee jeesh
x5=x5[x5!='PFAS'];x5=x5[x5!='Perfluorodecyl.ethanoic.acid']; x6=x6[x6!='Total_TG'] # x1;x2;x3;x4;x5;
tv_all=tv_all[,!colnames(tv_all) %in% c('Total_TG','PFAS',"Perfluorodecyl.ethanoic.acid")]
tv_half_log22=cbind(tv[,1],tv_half_log2);
# tv_half_log22=cbind(tv[,1:8],tv_half_log2);
x1=colnames(tv_half_log22[,c(1:8)]); v2=dim(NAFLD)[2]+1
x2=colnames(tv_half_log22[,9:v2]);v3=(dim(Bali)[2]+v2);
x3=colnames(tv_half_log22[,(v2+1):(v3)]);v4=(dim(Base)[2])+v3
x3=x3[c(length(x3),1:(length(x3)-1))]
x4=colnames(tv_half_log22[,(v3+1):(v4-1)]);
x5=colnames(tv_half_log22[,(v4):(dim(tv_half_log22)[2])]);
x3 <- paste(x3, "_L", sep="") #https://stackoverflow.com/questions/6984796/how-to-paste-a-string-on-each-element-of-a-vector-of-strings-using-apply-in-r
x4=gsub("(-[0-9]*)*.1", "", x4) #https://stackoverflow.com/questions/18997297/remove-ending-of-string-with-gsub
x4 <- paste(x4, "_S", sep="")# https://rdrr.io/bioc/qpgraph/man/qpNrr.html
x5a=x5[1:9]
x6=x5[10:length(x5)] #dividing to lipids
x5=x5a #making sure that PFAS are separate
nm = c(x1,x2,x3,x4,x5,x6); nm=c('PatientNumber','Gender','AGE','BMI','Steatosis Grade','Fibrosis Stage','Necroinflammation','HOMA-IR',nm[9:length(nm)])
colnames(tv_half_log22)=nm; #tv_half_log22[1:5,1:30]; #NAFLD[1:2,1:28];
colnames(tv_half_log22)[colnames(tv_half_log22)=='MASLD[, \"PFAS\"]']='PFAS';
# colnames(tv_half_log22)
#jälkeenpäin lienee jeesh
x5=x5[x5!='PFAS'];x5=x5[x5!='Perfluorodecyl.ethanoic.acid']; x6=x6[x6!='Total_TG'] # x1;x2;x3;x4;x5;
tv_half_log22=tv_half_log22[,!colnames(tv_half_log22) %in% c('Total_TG','PFAS',"Perfluorodecyl.ethanoic.acid")]
colnames(tv)[colnames(tv)=='17aOH-P4']='17a-OHP4'
colnames(tv_half_log22)[colnames(tv_half_log22)=='17aOH-P4']='17a-OHP4'
colnames(tv_all)[colnames(tv_all)=='17aOH-P4']='17a-OHP4'
Treatment=colnames(tv_all)[71:77];
Mediator=colnames(tv_all)[9:28];
Outcome=colnames(tv_all)[c(29:51,78:90)]; ##https://sparkbyexamples.com/r-programming/r-remove-from-vector-with-examples/
Treatment=Treatment[!Treatment %in% c('Perfluorodecyl.ethanoic.acid')]
tv_all=tv_all[,!colnames(tv_all) %in% c('Total_TG','PFAS','Perfluorodecyl.ethanoic.acid')]
tv_all=tv_all[,!colnames(tv_all) %in% x4]
Outcome=Outcome[!Outcome %in% c('Total_TG','PFAS','Perfluorodecyl.ethanoic.acid')]
Outcome=Outcome[! Outcome %in% x4] #https://sparkbyexamples.com/r-programming/r-remove-from-vector-with-examples/
Mediator[Mediator=="17aOH-P4"]="17a-OHP4"
tv_covscl=tv_all
tv_covNS=cbind(tv[,1:8],tv_all[,9:dim(tv_all)[2]])
tv_LOG_covscl=tv_half_log22
tv_LOG_covNS=cbind(tv[,1:8],tv_half_log22[,9:dim(tv_half_log22)[2]])
colnames(tv_covNS)[1:8]=colnames(tv_all)[1:8]
colnames(tv_LOG_covNS)[1:8]=colnames(tv_all)[1:8]
tv_c=tv_covscl #cbind(tv[,1:8], tv_half_log2) #check also not logged and then the auto
# https://stackoverflow.com/questions/6984796/how-to-paste-a-string-on-each-element-of-a-vector-of-strings-using-apply-in-r
# https://stackoverflow.com/questions/18997297/remove-ending-of-string-with-gsub
# https://rdrr.io/bioc/qpgraph/man/qpNrr.htmlMaking Boxplots
#https://r-graph-gallery.com/265-grouped-boxplot-with-ggplot2.html
#https://stackoverflow.com/questions/53724834/why-does-the-plot-size-differ-between-docx-and-html-in-rmarkdownrender
boxplots=function(tvt,Group,Outcome,Out,oute,other) {
# tvt=ie
if (Group=='Male') {tvt=tvt[tvt[,'Gender']==1,]} else if (Group=='Female')
{tvt=tvt[tvt[,'Gender']==0,]} else if (Group=='All') {tvt=tvt}
Steroid=rep(colnames(tvt[,9:28]), each=dim(tvt)[1])
data2=rep('Control',dim(tvt)[1])
num=min(tvt[,Outcome])
if (Outcome=='HOMA-IR') {num=1.5}
data2[tvt[,Outcome]>num]='Case' #'Steatosis.Grade.0.To.3' #
Treatment=data2
note=unlist(tvt[,9:28])
Concentration=as.vector(note)
data=data.frame(Steroid, Treatment , Concentration)
data[,'Group'] = 0
# data$Steroid #check the Xs etc out..
data$Steroid [data$Steroid == '17aOH-P4']='17a-OHP4'
# groups$Abbreviation[groups$Abbreviation == '17a-OHP4']='17aOH-P4'
for (i in 1:21) {data[data$Steroid %in% groups$Abbreviation[i],'Group']=groups$Group[i]}
title = paste(Out,"'s Effect to Concentrations of Steroids", ' in ',Group,sep="")
if (Group=='Male') {lep=theme(legend.position = "none")} else if (Group=='Female')
{lep=theme(legend.position = "none")} else if (Group=='All') {lep=theme_classic2()+theme(axis.text.x=element_text(angle=90,hjust=0.95,vjust=0.2,size = 14))}
# lep=theme(legend.position = "none")
if (num==1.5) {e1=paste('Case (>',num,')',sep="");e2=paste('Control (=',num,')',sep="")} else {e1=paste('Case (>',0,')',sep="");e2=paste('Control (=',0,')',sep="")}
data=data[!is.na(data$Concentration),]
# grouped boxplot: https://stackoverflow.com/questions/32539222/group-boxplot-data-while-keeping-their-individual-x-axis-labels-in-ggplot2-in-r
p=ggplot(data, aes(x=Steroid, y=Concentration, fill=Treatment))+
geom_boxplot(notch=F, notchwidth=0.5,outlier.shape=1,outlier.size=2, coef=1.5)+
# coord_cartesian(ylim = c(0, 60000))+
theme(axis.text=element_text(color="black"))+
theme_classic2()+#https://stackoverflow.com/questions/34522732/changing-fonts-in-ggplot2
# scale_x_discrete(guide = guide_axis(angle = 90))+ https://stackoverflow.com/questions/37488075/align-axis-label-on-the-right-with-ggplot2
theme(axis.text.x=element_text(angle=90,hjust=0.95,vjust=0.2,size = 14))+#annotate(geom="text",family="Broadway",size=20)+#annotate(geom="text",family="Calibri",size = 14)+
theme(panel.grid.minor=element_blank())+ #http://www.sthda.com/english/wiki/ggplot2-rotate-a-graph-reverse-and-flip-the-plot
labs(size= "Type",x = "Steroids",y = "Log2 of Picomolar Concentrations ", title=title,size = 14)+ #log2 Autoscaled
scale_fill_manual(values=c("orange","blue"),name=oute,labels=c(e1,e2))+ #abels=c("Case (>5)", "Control (=<5)"))
#showSignificance( c(1,1), 13.5, 0, "**")+
facet_grid(~Group, scales = "free_x", space = "free")+lep+
theme(text=element_text(size=10.5,family="Calibri"), #change font size of all text
axis.text=element_text(size=14), #change font size of axis text
axis.title=element_text(size=14), #change font size of axis titles
plot.title=element_text(size=14), #change font size of plot title
legend.text=element_text(size=14), #change font size of legend text
legend.title=element_text(size=14))+theme(axis.text=element_text(color="black"))
#+annotate(geom="text",family="Broadway",size=20) # add_pval(plot, pairs = list(c(1, 2)), test='wilcox.test');plot
#https://datavizpyr.com/horizontal-boxplots-with-ggplot2-in-r/
path="C:/Users/patati/Documents/GitHub/new/" #oh, classical: https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2
pngfile <- fs::path(path,paste0(Group,Out,'boxee',".png"))#fs::path(knitr::fig_path(), "theming2.png")
agg_png(pngfile, width = 60, height = 36, units = "cm", res = 300,scaling = 2)
plot(p)
invisible(dev.off())
knitr::include_graphics(pngfile)
}
tv_half_log22[,'11-KA4'][tv_half_log22[,'11-KA4']==min(tv_half_log22[,'11-KA4'])]=median(tv_half_log22[,'11-KA4'])
other='23824e';ie=tv_half_log22## 'Steatosis Grade','Fibrosis Stage','Necroinflammation','HOMA-IR'
windowsFonts(A = windowsFont("Calibri (Body)"))
Outcome='Steatosis Grade';Out='Steatosis'; oute='Steatosis';num=0;Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other)Outcome='Fibrosis Stage';Out='Fibrosis'; oute='Fibrosis Stage';num=0;Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other) Outcome='Necroinflammation';Out='Necroinflammation'; oute='Level';num=0;Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other)Outcome='HOMA-IR';Out='HOMA-IR'; oute='Level';num=1.5 ;Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other)#check the imputed 'ie' dataframe from the beginning... as well as num #you may want to use logs...
# Group='All';boxplots(ie,Group,Outcome,Out,oute,other);Group='Female';boxplots(ie,Group,Outcome,Out,oute,other);Group='Male';boxplots(ie,Group,Outcome,Out,oute,other)
# install.packages('ggpval')
# library(superb)
# library(ggpval)Making Chord Diagrams
# First the correlations for the chord diagrams (both male and female as well as total subjects):
tv_c=data.frame(tv_c)
tv_c=tv_c[,!colnames(tv_c) %in% c('Total_TG','PFAS',"Perfluorodecyl.ethanoic.acid")]
tvf=tv_c[tv_c[,'Gender']==min(tv_c[,'Gender']),1:dim(tv_c)[2]]
tvm=tv_c[tv_c[,'Gender']==max(tv_c[,'Gender']),1:dim(tv_c)[2]]
tvtest=list(tv_c,tvf,tvm)
for (i in 1:3) {colnames(tvtest[[i]]) <- gsub("\\.", "-", colnames(tvtest[[i]]))
colnames(tvtest[[i]]) <- gsub("X11", "11", colnames(tvtest[[i]]))
colnames(tvtest[[i]]) <- gsub("X17", "17", colnames(tvtest[[i]]))
colnames(tvtest[[i]])[colnames(tvtest[[i]])=="T-Epi-T"]="T/Epi-T"
colnames(tvtest[[i]])[colnames(tvtest[[i]])=="Steatosis-Grade"]="Steatosis Grade"
colnames(tvtest[[i]])[colnames(tvtest[[i]])=="Fibrosis-Stage"]="Fibrosis Stage"
colnames(tvtest[[i]])[colnames(tvtest[[i]])=="17aOH-P4"]="17a-OHP4"
colnames(tvtest[[i]])[colnames(tvtest[[i]])=="HOMA IR"]="HOMA-IR"}
tv_c=tvtest[[1]]; tvf=tvtest[[2]]; tvm=tvtest[[3]];
x4[x4=="X7.oxo.DCA_S"]="X7-oxo-DCA_S"
dat = tv_c; dat = dat %>% select(-c('PatientNumber')) #this is quite nice way to delete columns, please remember...
resulta <- (rcorr(as.matrix(dat), type = c('spearman')))$r #compare pearson
# intersect(colnames(resulta), rownames(resulta)) #https://stackoverflow.com/questions/45271448/r-finding-intersection-between-two-vectors
dat=tvf; dat= dat %>% select(-c('PatientNumber','Gender')) #this is quite nice way to delete columns, please remember...
resultaf <- (rcorr(as.matrix(dat), type = c('spearman')))$r #compare pearson
dat=tvm; dat= dat %>% select(-c('PatientNumber','Gender')) #this is quite nice way to delete columns, please remember...
resultam <- (rcorr(as.matrix(dat), type = c('spearman')))$r #compare pearson
#Check the columns away
at=colnames(resulta)[1:(length(x1)-1)] #clinicals
bt=colnames(resulta)[(length(at)+1):(length(at)+length(x2))] #Steroids
ct=colnames(resulta)[(length(at)+length(bt)+1):(length(at)+length(bt)+length(x3))] #BA_l
dt=colnames(resulta)[(length(at)+length(bt)+length(ct)+1):(length(at)+length(bt)+length(ct)+length(x4))] #BA_s
et=colnames(resulta)[(length(at)+length(bt)+length(ct)+length(dt)+1):(length(at)+length(bt)+length(ct)+length(dt)+length(x5))] #PFAS: change here
ft=colnames(resulta)[(length(at)+length(bt)+length(ct)+length(dt)+length(et)+1):(length(at)+length(bt)+length(ct)+length(dt)+length(et)+length(x6))] #
atl=length(at);btl=length(bt);ctl=length(ct);dtl=length(dt);etl=length(et);ftl=length(ft)
n_level=0.2; ## muuta tätä, # hist(as.numeric(Nrr)) #https://www.geeksforgeeks.org/elementwise-matrix-multiplication-in-r/
Nrr=qpNrr(resulta, verbose=FALSE);Nrr[is.na(Nrr)]=1; cond=data.frame(as.matrix(Nrr<n_level));RN=data.frame(resulta);tes_t=cond*RN;tes_t=as.matrix(tes_t);resulta=tes_t
Nrr=qpNrr(resultaf, verbose=FALSE);Nrr[is.na(Nrr)]=1;cond=data.frame(as.matrix(Nrr<n_level));RN=data.frame(resultaf);tes_t=cond*RN;tes_t=as.matrix(tes_t);
resultaf=tes_t
Nrr=qpNrr(resultam, verbose=FALSE);Nrr[is.na(Nrr)]=1;cond=data.frame(as.matrix(Nrr<n_level));RN=data.frame(resultam);tes_t=cond*RN;tes_t=as.matrix(tes_t);
resultam=tes_t
#Now first making just the chord diagram for all samples 'resulta'
n_level=0.2;
circos.clear(); #dev.off()
vars=list(resulta)
big='Yes';title='All Variables' #
rem=x4; modi=5; colt='black'
fig_name=paste('Chord Diagrams_veee with',title,'_NRR_',n_level,date,'.png')
classes=5;
tot=rownames(resulta)[2:dim(resulta)[1]];
a=length(x1)-1;b=length(x2);c=length(x3);
d=length(x4);e=length(x5);f=length(x6);#Check inside function
range=1:(a+b+c+e+f)
if (big=='Yes') {layout(matrix(1:1, 1, 1)); genders=c('Both Genders'); colors=c('black');title='All Variables'
} else {layout(matrix(1:2, 1, 2)) ; genders= c('Female','Male');colors=c('white','black');title='Gender'}
i=1
tes_t=resulta[1:dim(resulta)[1],1:dim(resulta)[2]]
a=length(x1)-1;b=length(x2);c=length(x3);d=length(x4);e=length(x5);f=length(x6);
g1=c(rep('Clinical', a),rep('Steroids', b), rep('BA_liver', c),rep('Contaminants', e),rep('Lipids', f)) #rep('BA_serum', d)
# removing self-correlation; I wonder if there could be a better way
tes_t[1:a,1:a]=0
tes_t[(a+1):(a+b),(a+1):(a+b)]=0
tes_t[(a+b+1):(a+b+c),(a+b+1):(a+b+c)]=0
tes_t[(a+b+c+1):(a+b+c+e),(a+b+c+1):(a+b+c+e)]=0
tes_t[(a+b+c+e+1):(a+b+c+e+f),(a+b+c+e+1):(a+b+c+e+f)]=0 #if you have more groups... make this automatic, now it is not (18.1.24)
group = structure(g1, names = colnames(tes_t));#group
grid.col = structure(c(rep('blue', a),rep('red', b), rep('green', c), rep('orange', e), rep('#756BB1', f)),
names = rownames(tes_t)); #rep('black', d),
#https://www.datanovia.com/en/blog/top-r-color-palettes-to-know-for-great-data-visualization/
tes_t=tes_t[range,range];grid.col = grid.col[range] #tes_t=resulta
g <- graph.adjacency(tes_t, mode="upper", weighted=TRUE, diag=FALSE)
e <- get.edgelist(g); df <- as.data.frame(cbind(e,E(g)$weight)); #
df[,3]=as.numeric(df[, 3])
rango <- function(x){((x-min(x))/(max(x)-min(x)))*2-1} #just a function for the -1 to 1 thing..
col_fun = colorRamp2(c(min(df$V3), 0,max(df$V3)), c("blue",'white', "red"))
df=df[!df$V1 %in% rem,];df=df[!df$V2 %in% rem,] #e.g.rem=x4
for (i in 1:2) {
df[,i]= gsub("\\.", "-", df[,i])
df[,i] <- gsub("X11", "11", df[,i])
df[,i] <- gsub("X17", "17", df[,i])
df[,i][df[,i]=="T-Epi-T"]="T/Epi-T"
df[,i][df[,i]=="Steatosis.Grade"]="Steatosis Grade"
df[,i][df[,i]=="Steatosis-Grade"]="Steatosis Grade"
df[,i][df[,i]=="Fibrosis.Stage"]="Fibrosis Stage"
df[,i][df[,i]=="Fibrosis-Stage"]="Fibrosis Stage"
df[,i][df[,i]=="17aOH.P4"]="17a-OHP4"
df[,i][df[,i]=="HOMA.IR"]="HOMA-IR"}
classes=modi #modi=4
namesh=unique(g1) #[c(1:6)[1:6 != modi]];
cola=unique(grid.col)#[c(1:6)[1:6 != modi]]
lgd_group = Legend(at = 'Both Genders', type = "points", legend_gp = gpar(col = 'black'), title_position = "topleft", title = title)
lgd_points = Legend(at = namesh, type = "points", legend_gp = gpar(col = cola), title_position = "topleft", title = "Class")
lgd_lines = Legend(at = c("Positive", "Negative"), type = "points", legend_gp = gpar(col = c('red','blue')), title_position = "topleft", title = "Correlation")
lgd_edges= Legend(at = c(round(min(df$V3),1), round(max(df$V3),1)), col_fun = col_fun, title_position = "topleft", title = "Edges")
lgd_list_vertical = packLegend(lgd_group,lgd_points, lgd_lines,lgd_edges) #lgd_lines,
# length(unique(colnames(resulta)));length(unique(df[,1]));length(unique(df[,2]));dim(df); #unique(df[,1]);unique(df[,2]);sum(colnames(resulta)=='CA')
chordDiagram(df, annotationTrack = c("grid"), grid.col=grid.col, directional = FALSE,order = rownames(tes_t), preAllocateTracks = 1, col = col_fun,transparency = 0.5)
circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
xlim = get.cell.meta.data("xlim"); ylim = get.cell.meta.data("ylim")
sector.name = get.cell.meta.data("sector.index")
circos.text(mean(xlim), ylim[1] + .1, sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5))
circos.axis(h = "top", labels.cex = 0.000001, major.tick.length = 0.2, sector.index = sector.name, track.index = 2)}, bg.border = NA)
windowsFonts(A = windowsFont("Calibri (Body)"))
draw(lgd_list_vertical, x = unit(5, "mm"), y = unit(5, "mm"), just = c("left", "bottom"))# https://stackoverflow.com/questions/31943102/rotate-labels-in-a-chorddiagram-r-circlize
#And then using a function for making the female and male chord diagrams... (checking later if this can be also for all.. all had one element more, i.e. 'gender' )
#This function takes elements from: https://jokergoo.github.io/circlize_book/book/advanced-layout.html#combine-circular-plots
group_chords=function(vars,n_level,fig_name, big,rem,modi,colt,gend,colors) {
classes=5;
tot=rownames(resulta)[2:dim(resulta)[1]];
a=length(x1)-2;b=length(x2);c=length(x3);
d=length(x4);e=length(x5);f=length(x6);#Check inside function
range=1:(a+b+c+e+f)
layout(matrix(1:1, 1, 1));
title='Gender'#
genders= gend
windowsFonts(A = windowsFont("Calibri (Body)"))
i=1
tes_t=vars;
colnames(tes_t)=rownames(resultaf);rownames(tes_t)=rownames(resultaf)
a=length(x1)-2;b=length(x2);c=length(x3);d=length(x4);e=length(x5);f=length(x6);
g1=c(rep('Clinical', a),rep('Steroids', b), rep('BA_liver', c),rep('Contaminants', e),rep('Lipids', f)) #rep('BA_serum', d)
# removing self-correlation
tes_t[1:a,1:a]=0
tes_t[(a+1):(a+b),(a+1):(a+b)]=0
tes_t[(a+b+1):(a+b+c),(a+b+1):(a+b+c)]=0
tes_t[(a+b+c+1):(a+b+c+e),(a+b+c+1):(a+b+c+e)]=0
tes_t[(a+b+c+e+1):(a+b+c+e+f),(a+b+c+e+1):(a+b+c+e+f)]=0 #if you have more groups... make this automatic, now it is not (18.1.23)
group = structure(g1, names = colnames(tes_t));#group
grid.col = structure(c(rep('blue', a),rep('red', b), rep('green', c), rep('orange', e), rep('#756BB1', f)),
names = rownames(tes_t)); ##https://www.datanovia.com/en/blog/top-r-color-palettes-to-know-for-great-data-visualization/
tes_t=tes_t[range,range];grid.col = grid.col[range] #tes_t=resulta
g <- graph.adjacency(tes_t, mode="upper", weighted=TRUE, diag=FALSE)
e <- get.edgelist(g); df <- as.data.frame(cbind(e,E(g)$weight)); #
df[,3]=as.numeric(df[, 3])
rango <- function(x){((x-min(x))/(max(x)-min(x)))*2-1} #just a function for the -1 to 1 thing..
col_fun = colorRamp2(c(min(df$V3), 0,max(df$V3)), c("blue",'white', "red"))
df=df[!df$V1 %in% rem,];df=df[!df$V2 %in% rem,] #e.g.rem=x4
# df$V3=rango(df$V3);
for (i in 1:2) {
df[,i]= gsub("\\.", "-", df[,i])
df[,i] <- gsub("X11", "11", df[,i])
df[,i] <- gsub("X17", "17", df[,i]); df[,i][df[,i]=="T-Epi-T"]="T/Epi-T"
df[,i][df[,i]=="Steatosis.Grade"]="Steatosis Grade"
df[,i][df[,i]=="Steatosis-Grade"]="Steatosis Grade"
df[,i][df[,i]=="Fibrosis.Stage"]="Fibrosis Stage"
df[,i][df[,i]=="Fibrosis-Stage"]="Fibrosis Stage"
df[,i][df[,i]=="17aOH.P4"]="17a-OHP4"
df[,i][df[,i]=="HOMA.IR"]="HOMA-IR"}
classes=modi #modi=4
namesh=unique(g1) #[c(1:6)[1:6 != modi]];
cola=unique(grid.col)#[c(1:6)[1:6 != modi]]
lgd_group = Legend(at = gend, type = "points", legend_gp = gpar(col = colors), title_position = "topleft", title = title)
lgd_points = Legend(at = namesh, type = "points", legend_gp = gpar(col = cola), title_position = "topleft", title = "Class")
lgd_lines = Legend(at = c("Positive", "Negative"), type = "points", legend_gp = gpar(col = c('red','blue')), title_position = "topleft", title = "Correlation")#round(min(df$V3)), round(max(df$V3)) #round(min(df$V3)), round(max(df$V3)), -1,1
lgd_edges= Legend(at = c(round(min(df$V3),1), round(max(df$V3),1)), col_fun = col_fun, title_position = "topleft", title = "Edges")
lgd_list_vertical = packLegend(lgd_group,lgd_points, lgd_lines,lgd_edges) #lgd_lines,
length(unique(colnames(resulta)));length(unique(df[,1]));length(unique(df[,2]));dim(df); #unique(df[,1]);unique(df[,2]);sum(colnames(resulta)=='CA')
chordDiagram(df, annotationTrack = c("grid"), grid.col=grid.col, directional = FALSE,
order = rownames(tes_t), preAllocateTracks = 1, col = col_fun,transparency = 0.5)
circos.trackPlotRegion(track.index = 1, panel.fun = function(x, y) {
xlim = get.cell.meta.data("xlim"); ylim = get.cell.meta.data("ylim")
sector.name = get.cell.meta.data("sector.index")
circos.text(mean(xlim), ylim[1] + .1, sector.name, facing = "clockwise", niceFacing = TRUE, adj = c(0, 0.5))
circos.axis(h = "top", labels.cex = 0.000001, major.tick.length = 0.2, sector.index = sector.name, track.index = 2)}, bg.border = NA) #https://stackoverflow.com/questions/31943102/rotate-labels-in-a-chorddiagram-r-circlize
windowsFonts(A = windowsFont("Calibri (Body)"))
draw(lgd_list_vertical, x = unit(5, "mm"), y = unit(5, "mm"), just = c("left", "bottom"))#}
}
vars=list(resultaf,resultam); #
big='No';title='Genders Separated'; #or 'Yes' for the big plot alone
rem=x4; modi=4; colt='black';colors=c('white','black');#rem=x3; modi=5; colt='green';
gend=c('Female');colors=c('white');group_chords(vars[[1]],n_level,fig_name,big,rem,modi,colt,gend,colors) #this drives the functiongend=c('Male');colors=c('black');group_chords(vars[[2]],n_level,fig_name,big,rem,modi,colt,gend,colors) #this drives the functionMaking Variance Explained Plots
# This is it! https://bioconductor.org/packages/release/bioc/vignettes/scater/inst/doc/overview.html
#or
# https://stats.stackexchange.com/questions/79399/calculate-variance-explained-by-each-predictor-in-multiple-regression-using-r
# https://rdrr.io/github/MRCIEU/TwoSampleMR/man/get_r_from_pn.html
# https://onlinestatbook.com/2/effect_size/variance_explained.html
# https://stackoverflow.com/questions/10441437/why-am-i-getting-x-in-my-column-names-when-reading-a-data-frame
# https://stackoverflow.com/questions/27044727/removing-characters-from-string-in-r
varex_groups_plot=function(tv_all2,Group) { # if (Group=='Female') {cond=tv_all[,'Gender']==1} else if
an.error.occured=FALSE
tv_all2=tv_all
tryCatch( { tv_all2[,'Gender'] }, error = function(e) {an.error.occured <<- TRUE})
if(an.error.occured) {if (Group=='female') {cond=tv_all2[,'SEX.1F.2M']==min(tv_all2[,'SEX.1F.2M'])} else if (Group=='male')
{cond=tv_all2[,'SEX.1F.2M']==max(tv_all2[,'SEX.1F.2M'])} else if (Group=='All'){cond=rep(TRUE,dim(tv_all2)[1])}} else {
if (Group=='female') {cond=tv_all2[,'Gender']==min(tv_all2[,'Gender'])} else if (Group=='male') {cond=tv_all2[,'Gender']==max(tv_all2[,'Gender'])} else if (Group=='All')
{cond=rep(TRUE,dim(tv_all2)[1])}}
tv_red=c(); tv_red=tv_all2[cond,]
hep=tv_red
colnames(hep)[1:8]=colnames(tv_red)[1:8]
tv2=t(hep[,9:dim(tv_red)[2]])
sce=SingleCellExperiment(tv2)
logcounts(sce)=tv2
sce@colData=DataFrame(hep[,2:8]) #it is DataFrame with big Ds and Fs
names(colData(sce))=colnames(tv_all)[2:8]
vars <- getVarianceExplained(sce,variables=names(colData(sce))[1:7])
windowsFonts(A = windowsFont("Calibri (Body)"))
p=plotExplanatoryVariables(vars)
library(ragg)
# Oh! https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
# https://r4ds.had.co.nz/graphics-for-communication.html#figure-sizing
path="C:/Users/patati/Documents/GitHub/new/" #oh, classical: https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2
pngfile <- fs::path(path,paste0(Group,'vex',".png"))#fs::path(knitr::fig_path(), "theming2.png")
agg_png(pngfile, width = 60, height = 36, units = "cm", res = 300,scaling = 2)
plot(p)
invisible(dev.off())
knitr::include_graphics(pngfile)
}
# I'll be using the tv_all (logged and then scaled) values here, even though in the original function took 'lognormed' values, i.e.
# 'logNormCounts' #tässä funktiossa ensin tehdään normeeraus ja sitten vasta log2..., ts./or:
# https://bioconductor.org/packages/devel/bioc/vignettes/SingleCellExperiment/inst/doc/intro.html
# counts <- assay(sce); # libsizes <- colSums(counts); # size.factors <- libsizes/mean(libsizes) # logcounts(sce) <- log2(t(t(counts)/size.factors) + 1)
# logcounts(sce)=scale(log2(tv2)) #depending on the order of the normalizing/scaling this will change...
# So, the way to go is however:
# https://bioinformatics.stackexchange.com/questions/22414/when-analysing-microarray-data-is-it-need-to-do-normalization-and-standardizati
# https://bioinformatics.stackexchange.com/questions/22426/inconsistent-microarray-expression-levels-after-normalizing-with-log2
# https://www.reddit.com/r/bioinformatics/comments/1ejs94m/log2_transformation_and_quantile_normalization/
varex_groups_plot(tv_all,Group='All');Making Heatmaps with Linear Model Estimates
# You may need a rather big function to calculate the estimates and plot at the same time, since the spaces of exper. interest have been reduced from the max dataset size.
# Eli maksimilla vedetään... eli pitäis olla ok, sillä skaalattu vastaa korrelaatiota tss. skaalaus vielä miehiin...
the_funal=function(tv,Group,ok,fn,adj,sig.level,sick,sick_group,joo) { # ok,aa,bb
# tv=tv_all
if (Group=='male') {NAFLDo=tv[tv[,'Gender']==max(tv[,'Gender']),]} else if (Group=='female')
{NAFLDo=tv[tv[,'Gender']==min(tv[,'Gender']),]} else if (Group=='All') {NAFLDo=tv}
SG0=NAFLDo[,c(2:dim(tv)[2])]
#https://stackoverflow.com/questions/10688137/how-to-fix-spaces-in-column-names-of-a-data-frame-remove-spaces-inject-dots
oknames=colnames(SG0)
SG0=data.frame(SG0)
colnames(SG0)
colnames(SG0[,8:27]) <- gsub("-", ".", colnames(SG0[,8:27]))
colnames(SG0[,8:27]) <- gsub("/", ".", colnames(SG0[,8:27]))
# SG0=destroyX(data.frame(SG0))
hesh=c()
xnam <- colnames(SG0)[c(4:7)]
Treatment2=Treatment
y <- Treatment2 #colnames(SG0)[c(70:76)]
hoesh=c()
j=1;i=1;p.val=c()
for (i in 1:length(xnam)) {
for (j in 1:length(y)) {
if (Group!='All') {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All')
{fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))} #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
poissone=lm( fmla, data=SG0) # anova(poissone);# poissone
p.val=c();p.val <- anova(poissone)$'Pr(>F)'[1]
ps=summary(poissone);
pss=ps[[4]] # fmla <- as.formula(paste(paste(c(colnames(SG0[,8:27])[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))
# uh=c();uh=summary(poissone)$fstatistic # https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
# p.val <- pf(uh[1], df1 = uh[2], df2 = uh[3],lower.tail=F)
hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])
jeps=SG0#
r=as.numeric(hoesh[4]) #Suom. tää oli aikasemmin 'hösh', mutta sitten tuli ongelmia
p=as.numeric(hoesh[5])
rsadj=as.numeric(hoesh[6])
colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
Treatment=hoesh[2]
Mediator=hoesh[1]
rm(hoesh)
hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))
}}
j=1;i=1; rm(xnam,y)
xnam <- colnames(SG0)[c(2)]
y <- Treatment2#colnames(SG0)[c(70:76)]
for (i in 1:length(xnam)) {
for (j in 1:length(y)) {
if (Group!='All') {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All')
{fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))} #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
poissone=lm( fmla, data=SG0) # anova(poissone);# poissone
p.val=c();p.val <- anova(poissone)$'Pr(>F)'[1]
ps=summary(poissone);
pss=ps[[4]] # Some pondering what to put as p values:
# uh=c();uh=summary(poissone)$fstatistic # https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
# p.val <- pf(uh[1], df1 = uh[2], df2 = uh[3],lower.tail=F)
hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])
jeps=SG0#
r=as.numeric(hoesh[4])
p=as.numeric(hoesh[5])
rsadj=as.numeric(hoesh[6])
colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
Treatment=hoesh[2]
Mediator=hoesh[1]
rm(hoesh)
hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))}}
j=1;i=1; rm(xnam,y)
xnam <- colnames(SG0)[c(3)];y <- Treatment2#colnames(SG0)[c(70:76)]
for (i in 1:length(xnam)) {
for (j in 1:length(y)) {
if (Group!='All') {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All')
{fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))}
#https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
poissone=lm( fmla, data=SG0) # anova(poissone);# poissone
p.val=c();p.val <- anova(poissone)$'Pr(>F)'[1]
ps=summary(poissone);
pss=ps[[4]] # fmla <- as.formula(paste(paste(c(colnames(SG0[,8:27])[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))
uh=c();uh=summary(poissone)$fstatistic # https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])
jeps=SG0#
r=as.numeric(hoesh[4])
p=as.numeric(hoesh[5])
rsadj=as.numeric(hoesh[6])
colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
Treatment=hoesh[2]
Mediator=hoesh[1]
rm(hoesh)
hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))}}
j=1;i=1; rm(xnam,y)
xnam <- Treatment2#colnames(SG0)[c(70:76)]#paste("x", 1:25, sep="") 28:length(colnames(SG0)
y = colnames(SG0[,8:27])
for (i in 1:length(xnam)) {
for (j in 1:length(y)) {
# if (file.exists(sub_dir)){next}
if (Group!='All') {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All')
{fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))} #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
poissone=lm( fmla, data=SG0) # anova(poissone);# poissone
p.val=c();p.val <- anova(poissone)$'Pr(>F)'[1]
ps=summary(poissone);
pss=ps[[4]] #
uh=c();uh=summary(poissone)$fstatistic # https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])
jeps=SG0#
r=as.numeric(hoesh[4])
p=as.numeric(hoesh[5])
rsadj=as.numeric(hoesh[6])
colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
Treatment=hoesh[2]
Mediator=hoesh[1]
hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))}}
# # 1) steroids=BA/lipid
# if (Group!='All') {
j=1;i=1; rm(xnam,y)
xnam <- c(x3,x6) # Group='All'
y <- c(colnames(SG0[,8:27])); #colnames(SG0)[c(4:7)]
for (i in 1:length(xnam)) {
for (j in 1:length(y)) {
# j=10
if (Group!='All') {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All')
{fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))} #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
poissone=lm( fmla, data=SG0) # anova(poissone);# poissone
p.val=c();
p.val <- anova(poissone)$'Pr(>F)'[1]
ps=summary(poissone);
pss=ps[[4]] # fmla <- as.formula(paste(paste(c(colnames(SG0[,8:27])[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))
uh=summary(poissone)$fstatistic # https://gettinggeneticsdone.blogspot.com/2011/01/rstats-function-for-extracting-f-test-p.html
p.vala <- pf(uh[1], df1 = uh[2], df2 = uh[3],lower.tail=F)
hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])
jeps=SG0#
r=as.numeric(hoesh[4])
p=as.numeric(hoesh[5])
rsadj=as.numeric(hoesh[6])
colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
Treatment=hoesh[2]
Mediator=hoesh[1]
rm(hoesh)
hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))}}
# ja 2) steroid = covar
j=1;i=1; rm(xnam,y)
xnam <- c('AGE','BMI',colnames(SG0)[c(4:7)]);
y <- c(colnames(SG0[,8:27])) # Group='All'
for (i in 1:length(xnam)) {
for (j in 1:length(y)) {
if (Group!='All') {fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE'), collapse= "+")))} else if (Group=='All')
{fmla <- as.formula(paste(paste(c(y[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))} #https://stats.stackexchange.com/questions/190763/how-to-decide-which-glm-family-to-use
poissone=lm( fmla, data=SG0) # anova(poissone);# poissone
p.val=c();p.val <- anova(poissone)$'Pr(>F)'[1]
ps=summary(poissone);
pss=ps[[4]] # fmla <- as.formula(paste(paste(c(colnames(SG0[,8:27])[j]," ~ "), collapse= ""), paste(c(xnam[i],'BMI','AGE','Gender'), collapse= "+")))
hoesh=c(y[j],xnam[i],Group,pss[2,1],pss[2,4],pss[2,2])
jeps=SG0#
r=as.numeric(hoesh[4])
p=as.numeric(hoesh[5])
rsadj=as.numeric(hoesh[6])
colnames(jeps)=colnames(tv)[2:dim(tv)[2]]
Treatment=hoesh[2]
Mediator=hoesh[1]
rm(hoesh)
hesh=rbind(hesh,c(y[j],xnam[i],Group,r,pss[2,4],rsadj))}}
hesa=hesh
hoi=as.data.frame(hesh)
hopiu=hoi
colnames(hopiu)=c('y','x','Gender','r','p','var_x')
colnames(hoi)=c('y','x','Gender','r','p','var_x')
#This in case you want to print to your local computer: ... :)
# main_dir <- paste0(c("C://Users//patati//Desktop//TurkuOW//RWork//",fn),collapse="")
# setwd(main_dir)
meds=names(table(hoi[,1]))[!names(table(hoi[,1])) %in% c(x3,x5,x6)]
covas=c('Steatosis.Grade','Fibrosis.Stage','Necroinflammation','HOMA.IR','AGE','BMI')
if (adj=='ok') {
# p.adjust(p=hopiu[,5], method = 'BH', n = length(hopiu[,5]))
hoi[,5]=p.adjust(p=hopiu[,5], method = 'BH', n = length(hopiu[,5]))
hopiu[,5]=p.adjust(p=hopiu[,5], method = 'BH', n = length(hopiu[,5]))
}
if (ok=='big') {
rsa=c();joi=c()
# Eli 'kaksi' vielä tarvitaan...
# 1) BA/lipid=covar ja steroidit, ja 2) steroid=covar
meds=names(table(hoi[,1]))[!names(table(hoi[,1])) %in% c(x3,x5,x6)]
covas=c('Steatosis.Grade','Fibrosis.Stage','Necroinflammation','HOMA.IR','AGE','BMI')
c1=hoi[,2] %in% covas
c2=hoi[,1] %in% meds
hyy=c1 & c2
m1=hoi[hyy,]
colnames(m1)=c('y','x','Gender','r','p','var_x') #c('y','x','Gender','r','p','radj')
c1=hoi[,2] %in% c(x3,x6); c2=hoi[,1] %in% meds
hyy=c1 & c2; m2=hoi[hyy,]
colnames(m2)=c('y','x','Gender','r','p','var_x') # hist(as.numeric(m2[,6]),breaks=50)
joi=rbind(m1,m2)
i=4;rs=c()
for (i in 4:5) {
rs=joi[,c(1,2,i)] # rs=data.frame(rs)
rs=reshape(rs,idvar="x",timevar="y",direction="wide")
rownames(rs)=rs[,1]
rs=rs[,-1]
library(stringr) # x1 = "aallworldpopulations"
colnames(rs)=str_sub(colnames(rs),3,-1)
colnames(rs) <- gsub("\\.", "-", colnames(rs))
colnames(rs) <- gsub("X11", "11", colnames(rs))
colnames(rs) <- gsub("X17", "17", colnames(rs))
# colnames(rs)["Steatosis-Grade"]
colnames(rs)[colnames(rs)=="T-Epi-T"]="T/Epi-T"
rownames(rs)[rownames(rs)=="Steatosis.Grade"]="Steatosis Grade"
rownames(rs)[rownames(rs)=="Fibrosis.Stage"]="Fibrosis Stage"
rownames(rs)[rownames(rs)=="HOMA.IR"]="HOMA-IR"
covas[covas=="Steatosis.Grade"]="Steatosis Grade"
covas[covas=="Fibrosis.Stage"]="Fibrosis Stage"
covas[covas=="HOMA.IR"]="HOMA-IR"
heps=c(groups[,2]) #check that you have driven the steroid data vis file...
cme1=match(heps,colnames(rs))
cme2=match(c(covas,x3,x6),rownames(rs))
rs=rs[cme2,cme1]
rsa=rbind(rsa,rs) }
rs1a=rsa[1:dim(rs)[1],];
rs2a=rsa[(dim(rs1a)[1]+1):(dim(rs1a)[1]+dim(rs1a)[1]),]
rs1=rs1a;rs2=rs2a
rownames(rs2)=str_sub(rownames(rs2), end = -2)
rownames(rs1) <- gsub("\\.", " ", rownames(rs1))
rownames(rs2) <- gsub("\\.", " ", rownames(rs2))
rownames(rs1)[rownames(rs1)=="HOMA IR"]="HOMA-IR";rownames(rs2)[rownames(rs2)=="HOMA IR"]="HOMA-IR"
rango = function(x,mi,ma) {(ma-mi)/(max(x)-min(x))*(x-min(x))+mi}
rs1 <- mutate_all(rs1, function(x) as.numeric(as.character(x)))
rs2 <- mutate_all(rs2, function(x) as.numeric(as.character(x)))
rs1=rango(rs1,-0.5,0.5) #check this if needed
rs1=as.matrix(rs1)
rs2=as.matrix(rs2)
width=2500; height=4400
order="original"; range='orig';corre='no_renorm'; type='full'; method='color';#ga='All';gf='Female';gm='Male' #color square
cl.offset=1.0;cl.length=5;cl.cex = 0.6;pch.cex=0.6;pch=10;cl.pos = 'n';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b'
ho=Group;hip1='BAs_lipids_as_y vs. steroids_as_x'
# https://www.rdocumentation.org/packages/corrplot/versions/0.94/topics/corrplot
# install.packages('useful')
# library(useful)
# library(ragg)
# Oh! https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
# https://r4ds.had.co.nz/graphics-for-communication.html#figure-sizing
# path="C:/Users/patati/Documents/GitHub/new/" #oh, classical: https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2
# pngfile <- fs::path(path,paste0(Group,'ee',".png")) #fs::path(knitr::fig_path(), "theming2.png")
# agg_png(pngfile, width = 60, height = 36, units = "cm", res = 300,scaling = 2)
corrplot(rs1, type = type, order = order,method=method, p.mat=rs2, tl.col = "black", cl.cex = cl.cex,pch.cex=pch.cex,pch.col='black',pch=pch,
sig.level = c(.001, .01, .05),cl.pos = cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length,tl.cex=0.5,
# to get the font size.... https://www.rdocumentation.org/packages/corrplot/versions/0.94/topics/corrplot
#.001, .01, .05, .01, .05, .1 .001, .01, .05, check males, .1, .05, .2
tl.srt = 90, diag = TRUE,col = rev(COL2('RdBu')[25:(length(COL2('RdBu'))-25)]),is.corr = FALSE) #,is.corr = FALSE
# invisible(dev.off())
# knitr::include_graphics(pngfile)
#https://stackoverflow.com/questions/26574054/how-to-change-font-size-of-the-correlation-coefficient-in-corrplot
#https://stackoverflow.com/questions/9543343/plot-a-jpg-image-using-base-graphics-in-r
# gw=getwd()# myjpg <- paste0(gw,'/',paste("Linear Model Estimate Plot of",hip1,Group,".jpg"))# library(imager)# image <- load.image(myjpg)# plot(image)
} else {
rsa=c();rs1=c();rs2=c()
c1=hoi[,1] %in% x5
hoi[c1,] #tää tulee suoraan tällä PFAS 7:lle ekalle
c2=hoi[,1] %in% names(table(hoi[,1]))[!names(table(hoi[,1])) %in% c(x3,x5,x6)] #vaikeemman kautta
hyy=c1 & c2
hoi2=hoi[c2 | c1 ,] #tähän myoes
rownames(hoi2)=1:dim(hoi2)[1]
hoi2=hoi2[1:182,]
a=hoi2[1:42,1];b=hoi2[1:42,2]
hoi2[1:42,]=cbind(b,a,hoi2[1:42,3:5])
hoi2=hoi2[,c(2,1,3:5)]
i=4;
rse=c()
for (i in 4:5) {
rse=hoi2[,c(1,2,i)] # rs=data.frame(rs)
rse=rse[order(rse[,1]),]
rs=reshape(rse,idvar="x",timevar="y",direction="wide")
rownames(rs)=rs[,1]
rs=rs[,-1]
library(stringr) # x1 = "aallworldpopulations"
colnames(rs)=str_sub(colnames(rs),3,-1)
colnames(rs) <- gsub("\\.", "-", colnames(rs))
colnames(rs) <- gsub("X11", "11", colnames(rs))
colnames(rs) <- gsub("X17", "17", colnames(rs))
colnames(rs)[colnames(rs)=="T-Epi-T"]="T/Epi-T"
colnames(rs)[colnames(rs)=="T-E-T"]="T/Epi-T"
colnames(rs)[colnames(rs)=="Steatosis-Grade"]="Steatosis Grade"
colnames(rs)[colnames(rs)=="Fibrosis-Stage"]="Fibrosis Stage"
colnames(rs)[colnames(rs)=="17aOH-P4"]="17a-OHP4"
heps=c(covas,groups[,2])
heps <- gsub("\\.", " ", heps)
heps[heps=="HOMA IR"]="HOMA-IR"
ccc=match(heps,colnames(rs))
rs=rs[,ccc]
rsa=rbind(rsa,rs)
}
rs1a=rsa[1:7,];
rs2a=rsa[8:14,]
rs1=rs1a;rs2=rs2a
rownames(rs2)=str_sub(rownames(rs2), end = -2)
width=5500; height=2300
order="original"; range='orig';corre='no_renorm'; type='full'; method='color'; #ga='All';gf='Female';gm='Male' #color square
cl.offset=1.0;cl.length=5;cl.cex = 1.05;pch.cex=1.05;pch=20;cl.pos = 'r';#cl.pos = 'b' ;#pch.cex=0.95,1.3; height=6300; pos 'b' cl.pos = 'b'
ho=Group; hip1='Steroids_y vs. PFAS_as_x'
rs1 <- mutate_all(rs1, function(x) as.numeric(as.character(x)))
rs2 <- mutate_all(rs2, function(x) as.numeric(as.character(x)))
rs1=rango(rs1,-0.5,0.5) #check if needed
# E.g., rs1[rs1 > 0.25]=0.25 # rs1[rs1 < -0.25]=-0.25 or aa, bb
rs1=as.matrix(rs1)
rs2=as.matrix(rs2)
corrplot(rs1, type = type, order = order,method=method, p.mat=rs2, tl.col = "black", cl.cex = cl.cex,pch.cex=pch.cex,pch.col='black',pch=pch,
sig.level = c(.001, .01, .05),cl.pos = cl.pos, insig = "label_sig",cl.offset=cl.offset,cl.length=cl.length, tl.cex=0.8,
tl.srt = 90, diag = TRUE,col = rev(COL2('RdBu')[25:(length(COL2('RdBu'))-25)]),is.corr = FALSE) #,is.corr = FALSE
# # Oh! https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
# # https://r4ds.had.co.nz/graphics-for-communication.html#figure-sizing
#https://www.jumpingrivers.com/blog/knitr-rmarkdown-image-size/
}
# https://scales.arabpsychology.com/stats/how-to-remove-the-last-character-from-a-string-in-r-2-examples/
return(list(hopiu))
}
# The scaling here just in case:
rango = function(x,mi,ma) {(ma-mi)/(max(x)-min(x))*(x-min(x))+mi}
#To apply to all groups at one go:
huus=function(tv,adj,sig.level,sick,sick_group,joo) {
huus=c();heijaa=c('All','female','male'); ok=c('big','small')
hyp=1;hrt=1
for (hyp in 1:2) {
for (hrt in 1:3) {
# the_funal=function(tv,Group,ok,aa,bb,fn,adj)
huus=append(huus,the_funal(tv,heijaa[hrt],ok[hyp],fn,adj,sig.level,sick,sick_group,joo))}}
return(huus)}
# Driving the function with the parameters as follows:
adj='nook'; sig.level=c(.001,0.01, 0.05); sick='no'; joo='joo' #sickGroup..
metanorm_S_non_fdr=huus(tv_all,adj,sig.level,sick,sick_group,joo)Making Causal Mediation Analysis
#The basic hypothesis. All are variables (y~x+m;m~x)
loop_med_simplified1a=function(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group) { # if (Group=='Female') {cond=tv_all[,'Gender']==1} else if (Group=='Male') # {cond=tv_all[,'Gender']==2} else if (Group=='All') {cond=rep(TRUE,dim(tv_all)[1])}
if (Group=='female') {cond=tv_all[,'Gender']==min(tv_all[,'Gender'])} else if (Group=='male') {cond=tv_all[,'Gender']==max(tv_all[,'Gender'])} else if (Group=='All') {cond=rep(TRUE,dim(tv_all)[1])}
tv_red=c();
if (sick=='yes') {tv_red=tv_all[cond & as.vector(sick_group),]} else {tv_red=tv_all[cond,]}
X <- tv_red[,Treatment] #Standard values did not five erros # hep=colnames(X)[!colnames(X) %in% c( "Benzylparaben" ,"Methylparaben")] # X=X[,hep]
M <- tv_red[,Mediator] #
Y <- tv_red[,Outcome] #"Steatosis.Grade.0.To.3" "Fibrosis.Stage.0.to.4" "Necroinflammation" "HOMA-IR"
# cova <- tv_red[,c('AGE','BMI','Gender')]
Data <- cbind(X,M,Y); ## colnames(Data)[which(names(Data) == "X")] <- "PFOA_L"
colnames(Data) <- gsub(" ", "_", colnames(Data)) # colnames(Data[,1:2])[1]=Treatment
Data=data.frame(Data)
# https://rdrr.io/cran/mlma/man/data.org.html # https://cran.r-project.org/web/packages/mma/mma.pdf
# this time the b and c are in the loop, and b is the model 'M', i.e. M~X (e.g. Allocholic acid ~ PFOA_L)
# the c is the model 'Y', i.e. Y~M+X, e.g. CAR ~PFOA + Allocholic acid (note, just one mediator at the time... )
control.value=colMins(as.matrix(X)) #test also with colMedians colMins -abs(colMins(as.matrix(X))*2
treat.value=colMaxs(as.matrix(X))
#M~X
x=c();m=c(); y=c();ye=c()
for (i in 1:length(colnames(X))) {x=append(x,paste("Data[, ",i , "]", sep=""))}
for (j in (dim(X)[2]+1):(length(colnames(M))+dim(X)[2])) {m=append(m,paste("Data[, ",j , "]", sep="")) }
#Y~X+M
for (z in (dim(M)[2]+dim(X)[2]+1):(dim(Data)[2])) {y=append(y,paste("Data[, ",z , "]", sep="")) } #this dimension was essential for the loop names
med_out=c();res=c(); tmp=c();rn=c();med_oute=c();med_sense=c();resa=c()
j=1;i=1;z=1
# simss=2; length(y)*length(m)*length(x)
for (i in 1:length(y)) {
for (j in 1:length(m)) { #control.value=mina[i]
for (z in 1:length(x)) {
fmla1 <- as.formula(paste(paste(m[j], collapse= "+")," ~ ", paste(x[z], collapse= "+")))
b = lm(fmla1, Data)
xm=paste(paste(c(x[z],m[j]), collapse= "+"))
fmla2 <- as.formula(paste(y[i]," ~ ", xm)) #https://www.statology.org/glm-vs-lm-in-r/
c = lm(fmla2, Data)
if (t.val=='no'){med_oute=mediate(b, c, treat = x[z], mediator = m[j],sims = simss)} else if (t.val=='yes') # control.value=control.value[z],treat.value=treat.value[z]
{med_oute=mediate(b, c, treat = x[z], mediator = m[j],sims = simss,control.value=control.value[z],treat.value=X[test,z] )} else if (t.val=='minmax')
{med_oute=mediate(b, c, treat = x[z], mediator = m[j],sims = simss,control.value=control.value[z],treat.value=treat.value[z] )}
med_out = summary(med_oute) #you need sims=100 min for the paper, maybe more like 1000... 10 was too little, but can get you results fast..
tmp=c(med_out$d0, med_out$d0.p, med_out$d0.ci[1],med_out$d0.ci[2],med_out$z0, med_out$z0.p, med_out$z0.ci[1],med_out$z0.ci[2],med_out$n1, med_out$n1.p,med_out$n1.ci[1],med_out$n1.ci[2],med_out$tau.coef,med_out$tau.p,med_out$tau.ci[1],med_out$tau.ci[2])
res <- rbind(res,tmp);
rn=append(rn,paste(colnames(X)[z],colnames(M)[j],colnames(Y)[i], sep=" ")) #attaching two rownames...
remove(tmp) }}} #https://intro2r.com/loops.html https://www.benjaminbell.co.uk/2022/12/loops-in-r-nested-loops.html
rownames(res)=rn #write.csv(rn,'iii.csv')
colnames(res)=c('ACME', 'd0.p', 'd0.ci_l','d0.ci_u','ADE', 'z0.p', 'z0.ci_l','z0.ci_u','Proportion Mediated', 'n1.p','n.ci_l','n1.ci_u','Total Effect','tau.p','tau.ci_l','tau.ci_u') #d0 and d1 are the same as.. 'd1', 'd1.p',
res=res[order(res[,2]),] #res=res[rev(order(res[,1])),]
rownames(res) <- gsub("X11", "11", rownames(res))
rownames(res) <- gsub("X17", "17", rownames(res))
write.xlsx(res, file = paste(name,Group,date,'.xlsx'), append = FALSE, row.names = TRUE)
#https://stackoverflow.com/questions/21847830/handling-java-lang-outofmemoryerror-when-writing-to-excel-from-r
return(res)}
#Testing hypothesis one:
the_essentials=function(Treatment, Mediator, Outcome,tv, Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip) {
hoi1=paste("C:/Users/patati/Desktop/TurkuOW/RWork/basic causal mediation with the counterfactuals/",fn,sep='')
setwd(hoi1) #https://topepo.github.io/caret/parallel-processing.html # https://ds-pl-r-book.netlify.app/optimization-in-r.html
name=paste(simss,'basic_divisions'); #sick='yes'
Group='All'; uh7ma=loop_med_simplified1a(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group);try({uh7ma},{uh7ma=data.frame(0)}); save(uh7ma,file=paste(fn,'all.RData'));#try({uh7ma},{uh7ma=data.frame(0)})
Group='female'; uh7f=loop_med_simplified1a(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group); try({uh7f},{uh7f=data.frame(0)});save(uh7f, file=paste(fn,'female.RData'));
Group='male'; uh7m=loop_med_simplified1a(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group);try({uh7m},{uh7m=data.frame(0)});save(uh7m, file=paste(fn,'male.RData'));
}
simss=100;name='Jaot_OK_basica'; joo='ei';ip=1
ccova=tv[,c("Steatosis.Grade.0.To.3" , "Fibrosis.Stage.0.to.4" ,"Necroinflammation" , "HOMA-IR")]
sick_group=rowSums(ccova)>4
file_names=c("Steatosis" , "Fibrosis" ,"Necroinflammation" , "HOMAIR", 'Menopause')
lkm=30; joo='ei';ip=1; sick='yes'
t.val='no'; Group='All' #name='generic';
#All;
joo='joo';sick='no'
ccova=tv[,c("Steatosis.Grade.0.To.3", "Fibrosis.Stage.0.to.4" ,"Necroinflammation","HOMA-IR")]
fn='All'; sick_group=rowSums(ccova)>4 #toth#
#Driving the analysis takes more than 1h (with a basic laptop 2023,100sims and all three groups) so drive the below only if needed:
#the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick, sick_group,fn,lkm,date,joo,ip)
# setwd("C:/Users/patati/Desktop/TurkuOW/RWork/basic causal mediation with the counterfactuals/") #check this if needed...
#In case, the 'case' sample/subject analysis would be needed:
# Steatosis
# ccovae=tv[,c("Steatosis.Grade.0.To.3")]; sick_group=ccovae>0 #toth# # hist(ccovae,breaks=100) # hist(ccova[,'HOMA-IR'],breaks=100)
# fn=file_names[1];
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip)
# #Fibrosis
# ccovae=tv[,c("Fibrosis.Stage.0.to.4")];
# sick_group=ccovae>0 #toth#
# fn=file_names[2];
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip)
# #Necroinfl.
# ccovae=tv[,c("Necroinflammation")]; sick_group=ccovae>0 #toth#
# fn=file_names[3];
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip)
# Homa # remember to always test the function with minimun number setting or as light parameters as possible to get it through... before big runs
# joo='ei';sick='yes'
# ccovae=tv[,c("HOMA-IR")]; sick_group=ccovae>1.5 #toth#
# fn=file_names[4];
# the_essentials(Treatment, Mediator, Outcome,tv_all,Group,name,simss,t.val,test,sick,sick_group,fn,lkm,date,joo,ip)
# Then there is also possibility for elab.Making Sankey Diagrams
#To do these diagrams, you need to have a reduced dataset as well as a function that acounts the group (male/female)
reduced2=function(u3,Group,name,lkm) {
c1=c()
ACMEMedian=c();ACMEpval=c();ACMEVar=c()
ADEMedian=c();ADEpval=c();ADEVar=c()
c1= u3 #[u3[,'ADE'] < ADEMedian & DV<ADEVar,] #& u3[,'z0.p']<ADEpval
ACMEMedian=0#median(c1[,'ACME'][c1[,'ACME']>0])
c1=c1[c1[,'ACME']>ACMEMedian & ((c1[,'ACME']-c1[,'ADE']) > 0), ]
c1=c1[rev(order(c1[,'ACME'])),];
c1=tryCatch({c1[1:lkm,]}, error = function(msg){return(c1)})
write.xlsx(c1, file = paste(name,Group,date,'.xlsx'), append = FALSE, row.names = TRUE)
return(c1)}
plottings_sf=function(uh7ma,date,sick,Group) {
rt2=uh7ma #[,1:17]# rtot=rtot[,1:17]# rtot=data.frame(rtot) # name=paste(simss,'basic hypothesis',take)# https://stat.ethz.ch/R-manual/R-devel/library/stats/html/p.adjust.html# https://www.middleprofessor.com/files/applied-biostatistics_bookdown/_book/adding-covariates-to-a-linear-model# https://github.com/MarioniLab/miloR# https://www.nature.com/articles/s41467-023-40458-9/figures/4
name=paste('Contaminants_Steroids_BAs_or_Lipids_sims',date) # rtot=rtot_2000_mrct # rtot=uh5
hoi=c();
hoi=scan(text=rt2[,1] , what=" ")#rownames(rt2)# names(rt2[,1]) rownames(rt2)
hoi=as.data.frame(matrix(hoi, ncol = 3, byrow = TRUE), stringsAsFactors = FALSE)
colnames(hoi)=c('Contaminants','Steroids','Bile Acids or Lipids')#,'Gender') ## https://stats.stackexchange.com/questions/282155/causal-mediation-analysis-negative-indirect-and-total-effect-positive-direct# https://www.researchgate.net/post/How_can_I_interpret_a_negative_indirect_effect_for_significant_mediation# https://stackoverflow.com/questions/31518150/gsub-in-r-is-not-replacing-dot replacing dot
hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH.P4']='17a-OHP4'
hoi[,'Steroids' ][hoi[,'Steroids' ]=='17aOH-P4']='17a-OHP4'
hoi[,'Steroids' ] <- gsub("\\.", "-", hoi[,'Steroids' ] ) #:)
hoi[,'Steroids' ][ hoi[,'Steroids' ]=='T-Epi-T']='T/Epi-T'
df2 <- hoi %>%make_long('Contaminants','Steroids','Bile Acids or Lipids')
#In case you need to print to computer: meda='Sankey plot of ', pdf(paste(meda,name,sick,Group,".pdf"), width = 20, height = 20, pointsize = 18);
# print(ggplot(df2, aes(x = x, next_x = next_x, node = node, next_node = next_node,fill = factor(node),label = node)) +geom_sankey(flow.alpha = 0.5, node.color = 1) + geom_sankey_label(size = 5.5, color = 1, fill = "white") +scale_fill_viridis_d() + theme_sankey(base_size = 30) + theme(legend.position = "none")+
#theme(axis.title.x = element_blank()));dev.off()
windowsFonts(A = windowsFont("Calibri (Body)"))
p=ggplot(df2, aes(x = x, next_x = next_x, node = node, next_node = next_node,fill = factor(node),label = node)) +
geom_sankey(flow.alpha = 1, node.color = 1) +
geom_sankey_label(size = 3.0, color = 1, fill = "white") +
# scale_fill_viridis_d(option = "D", alpha = 0.95) +
theme_sankey(base_size = 24) +
scale_fill_grey(start = 0.5, end = 0.5)+
theme(axis.text.x = element_text(hjust = 0.5, vjust=7,colour = 'black'))+ #https://stackoverflow.com/questions/38862303/customize-ggplot2-axis-labels-with-different-colors
theme(legend.position = "none") +
theme(axis.title.x = element_blank())
library(ragg)
# Oh! https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
# https://r4ds.had.co.nz/graphics-for-communication.html#figure-sizing
path="C:/Users/patati/Documents/GitHub/new/" #oh, classical: https://forum.posit.co/t/r-markdown-html-document-doesnt-show-image/41629/2
pngfile <- fs::path(path,paste0(Group,'e',".png"))#fs::path(knitr::fig_path(), "theming2.png")
agg_png(pngfile, width = 60, height = 36, units = "cm", res = 300,scaling = 2)
plot(p)
invisible(dev.off())
knitr::include_graphics(pngfile)
# install.packages(c(ragg))
# https://www.tidyverse.org/blog/2020/08/taking-control-of-plot-scaling/
}
library(readxl)
setwd("C:/Users/patati/Desktop/TurkuOW/RWork/tests6/tests_basic/") #check this if needed...
all_all=read_xlsx(path = "100basic All tikka3624 .xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #e on healthy :)
all_all=as.data.frame(all_all); all_all=all_all[!is.na(all_all$ACME),]; all_all=na.omit(all_all); all_all1=all_all
all_Female=read_xlsx(path = "100basic Female tikka3624 .xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #e on healthy :)
all_Female=as.data.frame(all_Female); all_Female=all_Female[!is.na(all_Female$ACME),]; all_Female=na.omit(all_Female);
all_Male=read_xlsx(path = "100basic Male tikka3624 .xlsx") #total Male tikka76524 hyp4b_oki.xlsx") #e on healthy :)
all_Male=as.data.frame(all_Male); all_Male=all_Male[!is.na(all_Male$ACME),]; all_Male=na.omit(all_Male);
sick='all samples'
lkm=30;Group='All'; name='just all';date='tikka6924_alla'
alma=reduced2(all_all1,Group,name,lkm);
plottings_sf(alma,date,sick,Group)date='tikka6924_femala';name='just all';Group='female';
almaf=reduced2(all_Female,Group,name,lkm); #all_Female
almaf = na.omit(almaf)
plottings_sf(almaf,date,sick,Group)date='tikka6924_mala';name='just all';Group='male';
almam=reduced2(all_Male,Group,name,lkm);
almam = na.omit(almam) #https://www.tutorialspoint.com/how-to-remove-rows-from-data-frame-in-r-that-contains-nan
plottings_sf(almam,date,sick,Group)